


local file_path_in "/tochange/"

****************************************************************************************
* create dataset of builder house sales 
****************************************************************************************

timer clear 1
timer on 1
set more off

use "`file_path_in'//county transactions//all_new_transactions.dta", clear

local identifier "APN_UNFORMATTED APN_SEQUENCE_NUMBER FIPS_CODE"
destring MORTGAGE_AMOUNT, gen(mortgage_amt) 

** scrub
cap drop temp
destring FIPS_CODE, replace
destring SALE_AMOUNT, replace

order SELLER_NAME OWNER_BUYER_1_LAST_NAME OWNER_BUYER_1_FIRST_NAME SALE_AMOUNT PROPERTY_CITY PROPERTY_STATE

rename SELLER_NAME seller
rename SALE_AMOUNT saleamount
rename OWNER_BUYER_1_LAST_NAME buyerlastname1 
rename OWNER_BUYER_1_FIRST_NAME buyerfirstname1
rename OWNER_BUYER_2_LAST_NAME buyerlastname2 
rename OWNER_BUYER_2_FIRST_NAME buyerfirstname2
rename PROPERTY_ZIPCODE zipcode
rename YEAR_BUILT yearbuilt

encode MORTGAGE_LOAN_TYPE_CODE, gen(mortgage_type)
rename MORTGAGE_INTEREST_RATE mortgage_rate
rename MORTGAGE_TERM mortgage_term
replace mortgage_rate = . if mortgage_rate==0  & mortgage_amt!=0
gen census_tract = substr(CENSUS_TRACT,1,6) 
rename CENSUS_TRACT census_block  
destring census_tract, replace 
destring census_block, replace 
drop if saleamount<10000
gen ltv = mortgage_amt / saleamount
replace ltv = . if ltv>2

gen zip  = substr(zipcode,1,5)
gen mail_zip  = substr(MAIL_ZIPCODE,1,5)

** get rid of cases where date is "0"
tostring RECORDING_DATE, replace
gen record1 = substr(RECORDING_DATE,1,6)
gen record2 = substr(RECORDING_DATE,7,8)
gen temp = record1+record2
replace temp = record1+"01" if record2=="00"
assert temp==RECORDING_DATE if record2!="00"
drop RECORDING_DATE record1 record2
rename temp RECORDING_DATE

** create date variable
gen year = substr(RECORDING_DATE,1,4)
gen monthdate = substr(RECORDING_DATE,5,6)
gen month = substr(monthdate,1,2)
gen temp = date(RECORDING_DATE,"YMD")
format temp %td
gen date = mofd(temp)
format date  %tm
drop temp monthdate
destring year, replace
destring month, replace
drop if month==0  
destring RECORDING_DATE, gen(recorddate)

** counties which existed in 2000 
gen early = (year==2000)
bysort FIPS_CODE: egen intemp = max(early)
replace intemp=0 if mi(intemp)

** create quarter variable 
gen quarter = 1 if inlist(month,1,2,3)
replace quarter = 2 if inlist(month,4,5,6)
replace quarter = 3 if inlist(month,7,8,9)
replace quarter = 4 if inlist(month,10,11,12)

gen length = length(APN_UNFORMATTED)
su length, meanonly
local lmax = `r(max)'
gen numstr = ""
gen letterstr = ""
forval i = 1/`lmax'{
	replace numstr = numstr + substr(APN_UNFORMATTED,`i',1) if inrange(real(substr(APN_UNFORMATTED,`i',1)),0,9)
	replace letterstr = letterstr + substr(APN_UNFORMATTED,`i',1) if !inrange(real(substr(APN_UNFORMATTED,`i',1)),0,9)
}
rename APN_UNFORMATTED apnold
rename numstr APN_UNFORMATTED
destring APN_UNFORMATTED, replace
format APN_UNFORMATTED %15.0g

* get rid of duplicates in which seller, buyer, APN, and sale amount is same
duplicates tag seller buyerlastname1 buyerfirstname1 saleamount `identifier', gen(ndups) 
drop if ndups>0
drop ndups

* scrub controls
replace TOTAL_BATHS_CALCULATED = . if TOTAL_BATHS_CALCULATED==0
replace TOTAL_ROOMS = . if TOTAL_ROOMS==0

* tsset house, date  (first get rid of duplicates of APN and recorddate) 
duplicates tag `identifier' recorddate, gen(ndups)
drop if ndups!=0  
drop ndups

********************************************* PULL LAND SALES *********************************************  
preserve
	keep if yearbuilt>(year+1) | yearbuilt==. | yearbuilt==0  
	egen saleid = group(seller saleamount recorddate)  
	bysort saleid: gen nlots = _n
	bysort saleid: replace nlots = nlots[_N] /* calculate number of lots in the sale*/
	bysort saleid: gen tracker = _n
	keep if tracker == 1 /* equivalent to collapsing bulk sales to one observation */
	drop tracker
	* correct widespread spelling mistakes or subsidiaries
	do "clean-builder-names.do" 
	do "trim-builder-names.do"  
	drop if mi(seller)  
	gen land_sale = 1
	do "scrub-controls.do" 
	rename seller seller_orig
	rename buyerfirstname1 seller
	do "clean-builder-names.do" 
	do "trim-builder-names.do"
	rename seller buyerfirstname1
	rename seller_orig seller
	gen self_sale = seller == buyerfirstname1   // want to eliminate cases where seller sells to himself
	keep if self_sale==0
	recast str60 seller 
	save  "`file_path_in'//builder county//land_sales.dta", replace
restore

********************************************* RETURN TO HOME SALES ANALYSIS *********************************************  
drop if mi(yearbuilt) | yearbuilt==0
gen land_sale = 0

** drop land purchases and sales
gen presale = 1 if buyerlastname1=="" | strlen(buyerlastname1)==1 
replace presale=0 if mi(presale)
sort `identifier' recorddate
bysort `identifier': gen presaleflag1 = presale[1]  /* identifier if first sale of home is land block */
bysort `identifier' : gen presaleflag2 = presale[2] if presaleflag1==1  
bysort `identifier': gen presaleflag3 = presale[3] if presaleflag2==1  
bysort `identifier': gen presaleflag4 = presale[4] if presaleflag3==1  
bysort `identifier': gen preseller1 = seller if presale==1  
bysort `identifier': replace preseller1 = preseller1[1]
bysort `identifier': gen preseller2 = seller if presale[2]==1  
bysort `identifier': replace preseller2 = preseller2[2]
bysort `identifier': gen presaleamt1 = saleamount if presale==1 
bysort `identifier': replace presaleamt1= presaleamt1[1]
bysort `identifier': gen presaleamt2 = saleamount if presale==1  
bysort `identifier': replace presaleamt2= presaleamt2[2]
sort `identifier' recorddate
bysort `identifier': gen resale = _n  /* what count of resale is this transaction */  

* drop if first sale is to developer
drop if presale==1 & resale==1   /* drop first sale if first sale to land block */
drop if presaleflag1==1 & presale == 1 & resale==2 /* drop second sale if first & second sale to land block. */
drop if presaleflag3==1 & presale == 1 & resale==3 /* drop third sale if first & second & third sale to land block.  */
drop if presaleflag4==1 & presale == 1 & resale==4 /* drop second sale if first & second & third & fourth sale to land block.  */
rename resale oldresale  /* after I've dropped presales, I want to regenerate resale variable */

sort `identifier' recorddate
bysort `identifier': gen resale = _n  /* which count of resale is this transaction */
bysort `identifier': gen totalsales = resale[_N]  /* track how many times house is resold */

** generate quarterly date var
gen qdate = qofd(dofm(date))
format qdate %tq 

** identify time and amount of second and third sales
bysort `identifier': gen famt = saleamount if resale==1
bysort `identifier': egen firstamt = max(famt)
bysort `identifier': gen fqdate = qdate if resale==1
bysort `identifier': egen firstqdate = max(fqdate)

bysort `identifier': gen samt = saleamount if resale==2
bysort `identifier': egen secondamt = max(samt)
bysort `identifier': gen sqdate = qdate if resale==2
bysort `identifier': egen secondqdate = max(sqdate)

bysort `identifier': gen tamt = saleamount if resale==3
bysort `identifier': egen thirdamt = max(tamt)
bysort `identifier': gen tqdate = qdate if resale==3
bysort `identifier': egen thirdqdate = max(tqdate)

gen return2 = ((secondamt - firstamt) / firstamt)
gen return3 = ((thirdamt - firstamt) / firstamt)
gen time2 = secondqdate - firstqdate  
gen time3 = (thirdqdate - firstqdate)
format firstqdate %tq
format secondqdate %tq
format thirdqdate %tq

** calculate annualized return
gen annual_return2 = (1+return2)^(1/(time2/4)) - 1
gen annual_return3 = (1+return3)^(1/(time3/4)) - 1

** pair of dates between first and second sale of home 
tostring firstqdate, gen(fqdatestr)
tostring secondqdate, gen(sqdatestr)
tostring thirdqdate, gen(tqdatestr)
gen pair_date2 = fqdatestr+","+sqdatestr
gen pair_date3 = fqdatestr+","+tqdatestr
replace pair_date2 = "" if secondqdate==.
replace pair_date3 = "" if thirdqdate==.

encode pair_date2 if time2>1 & secondqdate!=., gen(qpair_sales2)
encode pair_date3 if time3>2 & thirdqdate!=., gen(qpair_sales3)

gen nsold = 1 /* create field for how many houses seller has on market in one year (or month) */

********************************************* PULL ALL SALES OF NEW HOMES *********************************************  
gen new_resale = resale==1 & year<=2008  // first sale of home occurs before 2009
bysort `identifier': egen resale_of_new = max(new_resale) 
preserve
	keep if resale_of_new == 1  // these are new homes, whose first sale occurs before 2008 (so they will never be in my final sample)
	save  "`file_path_in'//builder county//resales_of_new_homes.dta", replace
restore	
********************************************* RETURN TO ANALYSIS OF FIRST NEW HOME SALES ONLY *************************
keep if resale==1   /* this is where I drop second sales of homes */
replace seller = "ACTUALPERSON" if resale!=1
	
do "scrub-controls.do" 

* merge in census new housing starts by county
merge m:1 date FIPS_CODE using "`file_path_in'//Census New Housing Starts/census.gov/permits_by_county.dta", 
keep if _merge==1 | _merge==3 
drop _merge

* correct  spelling mistakes or subsidiaries
if resale==1 {
	do "clean-builder-names.do" 
	do "trim-builder-names.do"  
}
drop if mi(seller)  

** filter builders
sort seller date

** merge in data on top 200 builders
merge m:1 seller year using "`file_path_in'/Top Builders/topbuildersovertime.dta"
keep if _merge==1 | _merge==3
gen top200 = 1 if rank!=.
replace top200 = 0 if mi(top200)
drop _merge
bysort seller: egen topbuilder = max(top200)

gen totalsale = saleamount

* get builder dataset, collapse to seller date fips level
preserve		
	collapse (sum) nsold (mean) saleamount (sum) totalsale , by(seller date year FIPS_CODE)

	* identify and drop developers named "99th Street Builders". 
	gen error = 1 if strlen(seller)==1
	gen temp00 = substr(seller,1,1) 
	replace error = 1 if temp00 == "0" | temp00=="1" | temp00=="2" | temp00=="3" | temp00=="4" | temp00=="5" | temp00=="6" | temp00=="7" | temp00=="8" | temp00=="9"
	drop temp00
	replace error=1 if seller=="OWNERRECORD" 
	replace error=0 if mi(error)

	* calculate the number of homes sold per year by the builder
	bysort seller year: gen nsale = sum(nsold)  
	bysort seller year: replace nsale = nsale[_N]  /* total number of homes sold per year by the builder */
	bysort seller : egen mnsale = mean(nsale) /* the average number of homes sold per year by the builder */

	* identify no. counties 
	bysort seller year : gen nfipsyr = _n
	bysort seller year : replace nfipsyr = nfipsyr[_N]
	
	*  identify no. counties 
	bysort seller FIPS_CODE: gen one_fips = _n==1
	bysort seller : replace one_fips = sum(one_fips)
	bysort seller : replace one_fips = one_fips[_N]

	* identify builders with few homes sold per year
	summ mnsale if error==0 & seller!="ACTUALPERSON", d
	local thres `r(p50)'
	gen small = 1 if mnsale<`thres' & !mi(mnsale)
	replace small = 0 if mi(small)
	di "`thres'"
	
	gen nsold_fips = nsold
	gen saleamount_fips = saleamount
	gen totalsale_fips = totalsale
	drop nsold saleamount totalsale
	
	saveold "`file_path_in'//builder county//rawBUILDERall_FIPS.dta", version(13) replace
restore

* get builder dataset, collapse to seller date ZIP level
preserve		
	collapse (sum) nsold (mean) saleamount (sum) totalsale , by(seller date year zip)

	* identify no. zips per year
	bysort seller year : gen nzipsyr = _n
	bysort seller year : replace nzipsyr = nzipsyr[_N]
	
	* identify no. zips 
	bysort seller zip: gen one_zip = _n==1
	bysort seller : replace one_zip = sum(one_zip)
	bysort seller : replace one_zip = one_zip[_N]

	gen nsold_zip = nsold
	gen saleamount_zip = saleamount
	gen totalsale_zip = totalsale
	drop nsold saleamount totalsale
	
	saveold "`file_path_in'//builder county//rawBUILDERall_ZIP.dta", version(13) replace
restore

** get house level dataset, where I only keep the transactions coming from builders that I've filtered
merge m:1 seller FIPS_CODE date using "`file_path_in'//builder county//rawBUILDERall_FIPS.dta", 
assert _merge==3
keep if _merge==3
drop _merge

merge m:1 seller zip date using "`file_path_in'//builder county//rawBUILDERall_ZIP.dta", 
assert _merge==3
keep if _merge==3
drop _merge

** merge builder info to land sales:
preserve
	use "`file_path_in'//builder county//land_sales.dta", clear
	merge m:1 seller FIPS_CODE date using "`file_path_in'//builder county//rawBUILDERall_FIPS.dta", keep(match) nogen
	merge m:1 seller zip date using "`file_path_in'//builder county//rawBUILDERall_ZIP.dta", keep(match) nogen
	save "`file_path_in'//builder county//land_sales_bldrs.dta", replace
restore	

************************************************************************************************ 
** identify non owner occupant sales
************************************************************************************************
gen non_owner1 = (mail_zip!=zip) & (MAIL_STREET_NAME!=PROPERTY_STREET_NAME) & (mail_zip!="") & (MAIL_STREET_NAME!="") & (PROPERTY_STREET_NAME!="") & (zip!="")
replace non_owner1 = 0 if non_owner1==.

gen non_owner2 = ABSENTEE_INDICATOR_CODE=="A"
replace non_owner2 = 0 if non_owner2==.

************************************************************************************************ 
** identify mortgages on sales made by builders
************************************************************************************************
gen lender = LENDER_LAST_NAME 
do "clean-lender-names.do"
local lenderlist "ASHTONWOODS AV BEAZER BROOKFIELDRESIDENTIAL CALATLANTIC CALIFORNIACOASTAL CALPROP CAPITALPACIFIC CENTEX CENTURYCOMMUNITIES COMSTOCK CROSSMANN DOMINION DRHORTON ENGEL HORIZON HOVNANIAN KB KIMBALLHILL LENNAR LGI M/I MDC MERITAGE NVR ORIOLE ORLEANS PULTE REGENCY RYLAND SCHULER SHEA TAYLORMORRISON TOLLBROTHERS TRIPOINTE UCP WCICOMMUNITIES WEEKLEY WILLIAMLYON WOODSIDE"

foreach c of local lenderlist {
	display "`c'"
	if "`c'" != "AVATAR" {
		replace lender = "`c'" if regexm(lender,"`c'")==1 
	}
}
gen lender_start=0
foreach c of local lenderlist {
	di "`c'"
	replace lender_start = 1 if lender=="`c'"
}
gen hbloan = 1 if seller==lender & lender_start==1
replace hbloan = 0 if mi(hbloan)

************************************************************************************************************************************   
** track whether a builder survives til 2014 
************************************************************************************************************************************ 
sort seller year
bysort seller (year): gen last_sale = year[_N]  /* last_sale==2014 means they survived til the end of the sample */

save "`file_path_in'//house county//rawHOUSEBUILDERall.dta", replace

timer off 1
timer list 1	

























